* This file calculates the optimal polynomial order to be used to produce column 4 of Table A2.

use "$saveddata/data_complete_withcosts.dta", clear


* collapse for analysis
collapse (count) freq = aeattenddisp (mean) tcount icount admit discharge* ///
		inpat_los = spell_los inpat_proc = number_proc ///
		death30 death90 death365 othexit ///
		d_* costem30_all costae30_all cost30_aeem, by(bin)
		
egen sumfreq = sum(freq)
gen pfreq = freq/sumfreq
drop sumfreq

* exclusions: remove post-600 minutes (not relevant to polynomial fit)
drop if bin>600

* bin polynomial variables
forv x = 1/15 {
	gen bin`x' = bin^`x'
	}

* split into 60 minute blocks
gen period = 1 if bin<=60
replace period = 2 if bin>60 & bin<=120
replace period = 3 if bin>120 & bin<=180
replace period = 4 if bin>240 & bin<=300
replace period = 5 if bin>300 & bin<=360
replace period = 6 if bin>360 & bin<=420
replace period = 7 if bin>420 & bin<=480
replace period = 8 if bin>480 & bin<=540
replace period = 9 if bin>540 

* parameters
local start = 180
local endoh = 260
mat parameters = J(1,15,.)		// columns = 2 + number of outcomes

* results matrix
mat results = J(26,13,.)

* wait time parameters
forv porder = 3/15 {
	forv endloop = 250(10)500 {

		* drop variables created in loop
		cap drop pfreq_cf diff
		
		* compute predictions
		cap qui reg pfreq bin1-bin`porder' if (bin<`start' | bin>`endloop') 
		
		* store results
		local endloopi = `endloop'/10 - 24
		local porderi = `porder' - 2

		if _rc==506 {
			mat results[`endloopi',`porderi'] = .
			}
			
		else {
			qui predict pfreq_cf, xb

			* difference in mass
			qui gen diff = pfreq - pfreq_cf 
			qui su diff if bin>=`start' & bin<=`endloop'
			*di "Poly: `porder' / End loop: `endloop' / Diff: `r(sum)'"
			mat results[`endloopi',`porderi'] = `r(sum)'
			}
		}
	}
	
* other outcome parameters
local outcomei = 3
foreach var of varlist admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365 {
	matrix oresults = J(9,13,.)
	forv porder = 3/15 {
		forv x = 1/9 {
			qui { 
				cap reg `var' bin1-bin`porder' if (bin<`start' | bin>`endoh') & period!=`x' 
				
				local i = `porder' - 2
				
				if _rc==506 {
					mat oresults[`x',`i'] = .
					}
				else {
					predict `var'hat if period==`x', xb
					gen error2 = (`var' - `var'hat)^2
					su error2
					mat oresults[`x',`i'] = r(sum)
					drop `var'hat error2
					}
				}
			}
		}
	
	mata : st_matrix("mse", colsum(st_matrix("oresults")))

	local min = 1
	local porder = 3
	forv x = 2/13 {
		if mse[1,`x']<mse[1,`min'] {
			local porder = `x' + 2
			local min = `x'
			}
		}

	mat parameters[1,`outcomei'] = `porder'
	local outcomei = `outcomei' + 1
	}
	
* combine results
clear
svmat results
foreach var of varlist results* {
	replace `var' = abs(`var')
	}
egen rowmin = rowmin(results*)
gen end = (_n + 24)*10
sort rowmin

local end = end[1]
local pcount = 3
foreach var of varlist results* {
	if `var'[1]==rowmin[1] {
		local poly = `pcount'
		}
	local pcount = `pcount' + 1
	}

mat parameters[1,1] = `end'
mat parameters[1,2] = `poly'
drop rowmin end

* create lookup
clear
svmat parameters
rename parameters1 end
rename parameters2 polystar_wait

local varcount = 3
foreach var in admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365  {
	rename parameters`varcount' polystar_`var'
	local varcount = `varcount' + 1
	}
	
order end polystar_wait polystar_*


save "$saveddata/lookup_polystar_agg.dta", replace



